{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Interpolation/Extrapolation Python code \n",
    "\n",
    "### This is the code for the interpolation or extrapolation calculation on Google Earth Engine through python"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Import libraries we need for calculation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import the libraries\n",
    "import ee\n",
    "import pandas as pd\n",
    "import os\n",
    "import numpy as np\n",
    "import random\n",
    "from random import sample\n",
    "from scipy.spatial import ConvexHull\n",
    "from sklearn.decomposition import PCA\n",
    "from itertools import combinations\n",
    "import itertools \n",
    "import geopandas as gpd\n",
    "from sklearn.metrics import r2_score\n",
    "from termcolor import colored # this is allocate colour and fonts type for the print title and text\n",
    "from IPython.display import display, HTML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "#set the working directory of local drive for Grid search result table loading\n",
    "# os.getcwd()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Intialize the ee API connection\n",
    "ee.Initialize()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Because the Interpolation and extropolation has two models one for present models while one for the potential models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# define the list of column names\n",
    "selectedCols = ['Aridity_Index',\n",
    "                'CHELSA_Annual_Mean_Temperature',\n",
    "                'CHELSA_Annual_Precipitation',\n",
    "                'CHELSA_Isothermality',\n",
    "                'CHELSA_Max_Temperature_of_Warmest_Month',\n",
    "                'CHELSA_Mean_Diurnal_Range',\n",
    "                'CHELSA_Mean_Temperature_of_Coldest_Quarter',\n",
    "                'CHELSA_Mean_Temperature_of_Driest_Quarter',\n",
    "                'CHELSA_Mean_Temperature_of_Warmest_Quarter',\n",
    "                'CHELSA_Mean_Temperature_of_Wettest_Quarter',\n",
    "                'CHELSA_Min_Temperature_of_Coldest_Month',\n",
    "                'CHELSA_Precipitation_Seasonality',\n",
    "                'CHELSA_Precipitation_of_Coldest_Quarter',\n",
    "                'CHELSA_Precipitation_of_Driest_Month',\n",
    "                'CHELSA_Precipitation_of_Driest_Quarter',\n",
    "                'CHELSA_Precipitation_of_Warmest_Quarter',\n",
    "                'CHELSA_Precipitation_of_Wettest_Month',\n",
    "                'CHELSA_Precipitation_of_Wettest_Quarter',\n",
    "                'CHELSA_Temperature_Annual_Range',\n",
    "                'CHELSA_Temperature_Seasonality',\n",
    "                'Depth_to_Water_Table',\n",
    "                'EarthEnvTopoMed_Eastness',\n",
    "                'EarthEnvTopoMed_Elevation',\n",
    "                'EarthEnvTopoMed_Northness',\n",
    "                'EarthEnvTopoMed_ProfileCurvature',\n",
    "                'EarthEnvTopoMed_Roughness',\n",
    "                'EarthEnvTopoMed_Slope',\n",
    "                'SG_Absolute_depth_to_bedrock',\n",
    "                'WorldClim2_SolarRadiation_AnnualMean',\n",
    "                'WorldClim2_WindSpeed_AnnualMean',\n",
    "                'EarthEnvCloudCover_MODCF_interannualSD',\n",
    "                'EarthEnvCloudCover_MODCF_intraannualSD',\n",
    "                'EarthEnvCloudCover_MODCF_meanannual',\n",
    "                'EarthEnvTopoMed_AspectCosine',\n",
    "                'EarthEnvTopoMed_AspectSine',\n",
    "                'LandCoverClass_Cultivated_and_Managed_Vegetation',\n",
    "                'Human_Disturbance',\n",
    "                'LandCoverClass_Urban_Builtup',\n",
    "                'SG_Clay_Content_0_100cm',\n",
    "                'SG_Coarse_fragments_0_100cm',\n",
    "                'SG_Sand_Content_0_100cm',\n",
    "                'SG_Silt_Content_0_100cm',\n",
    "                'SG_Soil_pH_H2O_0_100cm',\n",
    "                'cropland',\n",
    "                'grazing',\n",
    "                \"pasture\",\n",
    "                \"rangeland\",\n",
    "                \"PresentTreeCover\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 514170 entries, 0 to 526095\n",
      "Data columns (total 49 columns):\n",
      " #   Column                                            Non-Null Count   Dtype  \n",
      "---  ------                                            --------------   -----  \n",
      " 0   Aridity_Index                                     514170 non-null  float64\n",
      " 1   CHELSA_Annual_Mean_Temperature                    514170 non-null  float64\n",
      " 2   CHELSA_Annual_Precipitation                       514170 non-null  float64\n",
      " 3   CHELSA_Isothermality                              514170 non-null  float64\n",
      " 4   CHELSA_Max_Temperature_of_Warmest_Month           514170 non-null  float64\n",
      " 5   CHELSA_Mean_Diurnal_Range                         514170 non-null  float64\n",
      " 6   CHELSA_Mean_Temperature_of_Coldest_Quarter        514170 non-null  float64\n",
      " 7   CHELSA_Mean_Temperature_of_Driest_Quarter         514170 non-null  float64\n",
      " 8   CHELSA_Mean_Temperature_of_Warmest_Quarter        514170 non-null  float64\n",
      " 9   CHELSA_Mean_Temperature_of_Wettest_Quarter        514170 non-null  float64\n",
      " 10  CHELSA_Min_Temperature_of_Coldest_Month           514170 non-null  float64\n",
      " 11  CHELSA_Precipitation_Seasonality                  514170 non-null  float64\n",
      " 12  CHELSA_Precipitation_of_Coldest_Quarter           514170 non-null  float64\n",
      " 13  CHELSA_Precipitation_of_Driest_Month              514170 non-null  float64\n",
      " 14  CHELSA_Precipitation_of_Driest_Quarter            514170 non-null  float64\n",
      " 15  CHELSA_Precipitation_of_Warmest_Quarter           514170 non-null  float64\n",
      " 16  CHELSA_Precipitation_of_Wettest_Month             514170 non-null  float64\n",
      " 17  CHELSA_Precipitation_of_Wettest_Quarter           514170 non-null  float64\n",
      " 18  CHELSA_Temperature_Annual_Range                   514170 non-null  float64\n",
      " 19  CHELSA_Temperature_Seasonality                    514170 non-null  float64\n",
      " 20  Depth_to_Water_Table                              514170 non-null  float64\n",
      " 21  EarthEnvTopoMed_Eastness                          514170 non-null  float64\n",
      " 22  EarthEnvTopoMed_Elevation                         514170 non-null  float64\n",
      " 23  EarthEnvTopoMed_Northness                         514170 non-null  float64\n",
      " 24  EarthEnvTopoMed_ProfileCurvature                  514170 non-null  float64\n",
      " 25  EarthEnvTopoMed_Roughness                         514170 non-null  float64\n",
      " 26  EarthEnvTopoMed_Slope                             514170 non-null  float64\n",
      " 27  SG_Absolute_depth_to_bedrock                      514170 non-null  float64\n",
      " 28  WorldClim2_SolarRadiation_AnnualMean              514170 non-null  float64\n",
      " 29  WorldClim2_WindSpeed_AnnualMean                   514170 non-null  float64\n",
      " 30  EarthEnvCloudCover_MODCF_interannualSD            514170 non-null  int64  \n",
      " 31  EarthEnvCloudCover_MODCF_intraannualSD            514170 non-null  int64  \n",
      " 32  EarthEnvCloudCover_MODCF_meanannual               514170 non-null  int64  \n",
      " 33  EarthEnvTopoMed_AspectCosine                      514170 non-null  float64\n",
      " 34  EarthEnvTopoMed_AspectSine                        514170 non-null  float64\n",
      " 35  LandCoverClass_Cultivated_and_Managed_Vegetation  514170 non-null  int64  \n",
      " 36  Human_Disturbance                                 514170 non-null  float64\n",
      " 37  LandCoverClass_Urban_Builtup                      514170 non-null  int64  \n",
      " 38  SG_Clay_Content_0_100cm                           514170 non-null  float64\n",
      " 39  SG_Coarse_fragments_0_100cm                       514170 non-null  float64\n",
      " 40  SG_Sand_Content_0_100cm                           514170 non-null  float64\n",
      " 41  SG_Silt_Content_0_100cm                           514170 non-null  float64\n",
      " 42  SG_Soil_pH_H2O_0_100cm                            514170 non-null  float64\n",
      " 43  cropland                                          514170 non-null  float64\n",
      " 44  grazing                                           514170 non-null  float64\n",
      " 45  pasture                                           514170 non-null  float64\n",
      " 46  rangeland                                         514170 non-null  float64\n",
      " 47  PresentTreeCover                                  514170 non-null  float64\n",
      " 48  WDPA                                              514170 non-null  int64  \n",
      "dtypes: float64(43), int64(6)\n",
      "memory usage: 196.1 MB\n",
      "Composite Bands ['Aridity_Index', 'CHELSA_Annual_Mean_Temperature', 'CHELSA_Annual_Precipitation', 'CHELSA_Isothermality', 'CHELSA_Max_Temperature_of_Warmest_Month', 'CHELSA_Mean_Diurnal_Range', 'CHELSA_Mean_Temperature_of_Coldest_Quarter', 'CHELSA_Mean_Temperature_of_Driest_Quarter', 'CHELSA_Mean_Temperature_of_Warmest_Quarter', 'CHELSA_Mean_Temperature_of_Wettest_Quarter', 'CHELSA_Min_Temperature_of_Coldest_Month', 'CHELSA_Precipitation_Seasonality', 'CHELSA_Precipitation_of_Coldest_Quarter', 'CHELSA_Precipitation_of_Driest_Month', 'CHELSA_Precipitation_of_Driest_Quarter', 'CHELSA_Precipitation_of_Warmest_Quarter', 'CHELSA_Precipitation_of_Wettest_Month', 'CHELSA_Precipitation_of_Wettest_Quarter', 'CHELSA_Temperature_Annual_Range', 'CHELSA_Temperature_Seasonality', 'Depth_to_Water_Table', 'EarthEnvTopoMed_Eastness', 'EarthEnvTopoMed_Elevation', 'EarthEnvTopoMed_Northness', 'EarthEnvTopoMed_ProfileCurvature', 'EarthEnvTopoMed_Roughness', 'EarthEnvTopoMed_Slope', 'SG_Absolute_depth_to_bedrock', 'WorldClim2_SolarRadiation_AnnualMean', 'WorldClim2_WindSpeed_AnnualMean', 'EarthEnvCloudCover_MODCF_interannualSD', 'EarthEnvCloudCover_MODCF_intraannualSD', 'EarthEnvCloudCover_MODCF_meanannual', 'EarthEnvTopoMed_AspectCosine', 'EarthEnvTopoMed_AspectSine', 'LandCoverClass_Cultivated_and_Managed_Vegetation', 'Human_Disturbance', 'LandCoverClass_Urban_Builtup', 'SG_Clay_Content_0_100cm', 'SG_Coarse_fragments_0_100cm', 'SG_Sand_Content_0_100cm', 'SG_Silt_Content_0_100cm', 'SG_Soil_pH_H2O_0_100cm', 'cropland', 'grazing', 'pasture', 'rangeland', 'PresentTreeCover', 'WDPA']\n"
     ]
    }
   ],
   "source": [
    "# Import the data and view a summary of it\n",
    "# load my own data table\n",
    "presentData = pd.read_csv('Data/vData/PCA_ConvexHull_IntExt/20230126_Merged_Covariates_sampled_dataset_outliers_cleaned_for_Figure.csv').dropna()\n",
    "presentData = presentData[selectedCols+['WDPA']]\n",
    "# presentData = presentData.astype({\"EarthEnvCloudCover_MODCF_interannualSD\":'float',\n",
    "#                                   \"EarthEnvCloudCover_MODCF_intraannualSD\":'float',\n",
    "#                                   \"EarthEnvCloudCover_MODCF_meanannual\":'float',\n",
    "#                                   \"LandCoverClass_Cultivated_and_Managed_Vegetation\":'float',\n",
    "#                                   \"LandCoverClass_Urban_Builtup\":'float',\n",
    "#                                   \"WDPA\":'int'})\n",
    "\n",
    "# drop the na columns\n",
    "presentData.info()\n",
    "presentData.describe()\n",
    "presentData.head(15)\n",
    "# Instantiate the composite that was used to sample the points\n",
    "# compositeImage = ee.Image(\"WORLDCLIM/V1/BIO\")\n",
    "fullCompositeImage = ee.Image(\"users/leonidmoore/ForestBiomass/20200915_Forest_Biomass_Predictors_Image\").toDouble()\n",
    "# presentCompositeImage = fullCompositeImage.select(selectedCols).toDouble().addBands(fullCompositeImage.select('WDPA').toInt64())\n",
    "presentCompositeImage = fullCompositeImage.select(selectedCols+['WDPA'])\n",
    "print('Composite Bands',presentCompositeImage.bandNames().getInfo())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Input the proportion of variance that you would like to cover when running the script\n",
    "propOfVariance = 90"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def assessExtrapolation(importedData, compositeImage, propOfVariance):\n",
    "    \n",
    "    # Excise the columns of interest from the data frame\n",
    "    variablesOfInterest = importedData #drop(['system:index', '.geo'], axis=1)\n",
    "    \n",
    "    # Compute the mean and standard deviation of each band, then standardize the point data\n",
    "    meanVector = variablesOfInterest.mean()\n",
    "    stdVector = variablesOfInterest.std()\n",
    "    standardizedData = (variablesOfInterest-meanVector)/stdVector\n",
    "    \n",
    "    # Then standardize the composite from which the points were sampled\n",
    "    meanList = meanVector.tolist()\n",
    "    stdList = stdVector.tolist()\n",
    "    bandNames = list(meanVector.index)\n",
    "    meanImage = ee.Image(meanList).rename(bandNames)\n",
    "    stdImage = ee.Image(stdList).rename(bandNames)\n",
    "    standardizedImage = compositeImage.subtract(meanImage).divide(stdImage)\n",
    "    \n",
    "    # Run a PCA on the point samples\n",
    "    pcaOutput = PCA()\n",
    "    pcaOutput.fit(standardizedData)\n",
    "    \n",
    "    # Save the cumulative variance represented by each PC\n",
    "    cumulativeVariance = np.cumsum(np.round(pcaOutput.explained_variance_ratio_, decimals=4)*100)\n",
    "    \n",
    "    # Make a list of PC names for future organizational purposes\n",
    "    pcNames = ['PC'+str(x) for x in range(1,variablesOfInterest.shape[1]+1)]\n",
    "    \n",
    "    # Get the PC loadings as a data frame\n",
    "    loadingsDF = pd.DataFrame(pcaOutput.components_,columns=[str(x)+'_Loads' for x in bandNames],index=pcNames)\n",
    "    \n",
    "    # Get the original data transformed into PC space\n",
    "    transformedData = pd.DataFrame(pcaOutput.fit_transform(standardizedData,standardizedData),columns=pcNames)\n",
    "    \n",
    "    # Make principal components images, multiplying the standardized image by each of the eigenvectors\n",
    "    # Collect each one of the images in a single image collection;\n",
    "    \n",
    "    # First step: make an image collection wherein each image is a PC loadings image\n",
    "    listOfLoadings = ee.List(loadingsDF.values.tolist());\n",
    "    eePCNames = ee.List(pcNames)\n",
    "    zippedList = eePCNames.zip(listOfLoadings)\n",
    "    def makeLoadingsImage(zippedValue):\n",
    "        return ee.Image.constant(ee.List(zippedValue).get(1)).rename(bandNames).set('PC',ee.List(zippedValue).get(0))\n",
    "    loadingsImageCollection = ee.ImageCollection(zippedList.map(makeLoadingsImage))\n",
    "    \n",
    "    # Second step: multiply each of the loadings image by the standardized image and reduce it using a \"sum\"\n",
    "    # to finalize the matrix multiplication\n",
    "    def finalizePCImages(loadingsImage):\n",
    "        return ee.Image(loadingsImage).multiply(standardizedImage).reduce('sum').rename([ee.String(ee.Image(loadingsImage).get('PC'))]).set('PC',ee.String(ee.Image(loadingsImage).get('PC')))\n",
    "    principalComponentsImages = loadingsImageCollection.map(finalizePCImages)\n",
    "    \n",
    "    # Choose how many principal components are of interest in this analysis based on amount of\n",
    "    # variance explained\n",
    "    numberOfComponents = sum(i < propOfVariance for i in cumulativeVariance)+1\n",
    "    print('Number of Principal Components being used:',numberOfComponents)\n",
    "    \n",
    "    # Compute the combinations of the principal components being used to compute the 2-D convex hulls\n",
    "    tupleCombinations = list(combinations(list(pcNames[0:numberOfComponents]),2))\n",
    "    print('Number of Combinations being used:',len(tupleCombinations))\n",
    "    \n",
    "    # Generate convex hulls for an example of the principal components of interest\n",
    "    cHullCoordsList = list()\n",
    "    for c in tupleCombinations:\n",
    "        firstPC = c[0]\n",
    "        secondPC = c[1]\n",
    "        outputCHull = ConvexHull(transformedData[[firstPC,secondPC]])\n",
    "        listOfCoordinates = transformedData.loc[outputCHull.vertices][[firstPC,secondPC]].values.tolist()\n",
    "        flattenedList = [val for sublist in listOfCoordinates for val in sublist]\n",
    "        cHullCoordsList.append(flattenedList)\n",
    "    \n",
    "    # Reformat the image collection to an image with band names that can be selected programmatically\n",
    "    pcImage = principalComponentsImages.toBands().rename(pcNames)\n",
    "    \n",
    "    # Generate an image collection with each PC selected with it's matching PC\n",
    "    listOfPCs = ee.List(tupleCombinations)\n",
    "    listOfCHullCoords = ee.List(cHullCoordsList)\n",
    "    zippedListPCsAndCHulls = listOfPCs.zip(listOfCHullCoords)\n",
    "    \n",
    "    def makeToClassifyImages(zippedListPCsAndCHulls):\n",
    "        imageToClassify = pcImage.select(ee.List(zippedListPCsAndCHulls).get(0)).set('CHullCoords',ee.List(zippedListPCsAndCHulls).get(1))\n",
    "        classifiedImage = imageToClassify.rename('u','v').classify(ee.Classifier.spectralRegion([imageToClassify.get('CHullCoords')]))\n",
    "        return classifiedImage\n",
    "    classifedImages = ee.ImageCollection(zippedListPCsAndCHulls.map(makeToClassifyImages))\n",
    "    finalImageToExport = classifedImages.sum().divide(ee.Image.constant(len(tupleCombinations)))\n",
    "    \n",
    "    return finalImageToExport"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of Principal Components being used: 19\n",
      "Number of Combinations being used: 171\n"
     ]
    }
   ],
   "source": [
    "# Apply the function\n",
    "finalImageToExportPresent = assessExtrapolation(presentData, presentCompositeImage, propOfVariance)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# define the boudary of the exportation. keep it as \n",
    "unboundedGeo = ee.Geometry.Polygon([-180, 88, 0, 88, 180, 88, 180, -88, 0, -88, -180, -88], None, False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Export the calucation results to Google earth engine Asset folder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'state': 'READY',\n",
       " 'description': 'GS_IntExt_Image',\n",
       " 'creation_timestamp_ms': 1675775214892,\n",
       " 'update_timestamp_ms': 1675775214892,\n",
       " 'start_timestamp_ms': 0,\n",
       " 'task_type': 'EXPORT_IMAGE',\n",
       " 'id': '7UCXEM2GGIHZNOIB6CB6FISK',\n",
       " 'name': 'projects/earthengine-legacy/operations/7UCXEM2GGIHZNOIB6CB6FISK'}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# export the present IntExt calcualtion results into the local folder\n",
    "taskPresent = ee.batch.Export.image.toAsset(\n",
    "    image = finalImageToExportPresent,\n",
    "    description = 'GS_IntExt_Image',\n",
    "    assetId = 'users/leonidmoore/ForestBiomass/IntExt/IntExt_of_GS_Models',\n",
    "    region = unboundedGeo,\n",
    "    crs = 'EPSG:4326',\n",
    "    crsTransform = [0.008333333333333333,0,-180,0,-0.008333333333333333,90],\n",
    "    maxPixels = 1e13)\n",
    "# execute the calculation which will be present at the Google earth engine web browser interface\n",
    "taskPresent.start()\n",
    "# show the task status\n",
    "taskPresent.status()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  },
  "vscode": {
   "interpreter": {
    "hash": "e9d0d6ca7801c1f2a6a062e0d82d29539109b66c829362c3982d539fdc2e2bbb"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
